/****************************************************************************
** Produces variety of descriptive statistics from Econographics dataset 	*
** for Section 4.1 of the paper												*
** 																			*
** 																			*		
** Produced by Erik Snowberg, October 13, 2022								*
** 		Based on Code by Jonathan Chapman and Erik Snowberg					*
****************************************************************************/

// Use main robustEconographics dataset, stack twice so we can summarize
// variables with two measures with a single descriptive statistic
use "${datapath}econographicsTransformed.dta", clear
gen stack=1
append using "${datapath}econographicsTransformed.dta"
replace stack=2 if stack==.

** Drop all previous averages since these are based on FA values
** and some other variables which will confuse things
drop avg_* 

// Some have two measures named differently
gen overestimationIQ1=overestimationMatrix
gen FAoverestimationIQ1=FAoverestimationMatrix
gen overestimationIQ2=overestimationRotation
gen FAoverestimationIQ2=FAoverestimationRotation
gen overPlacementFacts1=overplacementTele
gen FAoverPlacementFacts1=FAoverplacementTele
gen overPlacementFacts2=overplacementUnemp
gen FAoverPlacementFacts2=FAoverplacementUnemp
gen qualOverPrecise1=qualitativeOverprecisionTele
gen FAqualOverPrecise1=FAqualitativeOverprecisionTele
gen qualOverPrecise2=qualitativeOverprecisionUnemp
gen FAqualOverPrecise2=FAqualitativeOverprecisionUnemp

// Change some variables to be pct of EV

** for risk aversion gain change to % of EV
gen riskGain1=1-riskAversionGain1
gen riskGain2=1-riskAversionGain2
** for risk aversion loss change to % of EV after adding 5k to both to make comparable with gain lotteries
gen riskLoss1=(((riskAversionLoss1+1)*-2500)+5000)/2500
gen riskLoss2=(((riskAversionLoss2+1)*-2500)+5000)/2500
** for risk aversion mix change to % of EV after adding 5k /4k to place in gain domain
gen riskMixGainLoss1=(((1-riskAversionMixGainLoss1)*10000)-5000)/5000
gen riskMixGainLoss2=(((1-riskAversionMixGainLoss2)*10000)-6000)/4000

gen riskCRSure1=0.8*riskAversionCRSure1MidP/2500
gen riskCRSure2=0.75*riskAversionCRSure2MidP/4000
gen riskCRLottery1=0.2*riskAversionCRLottery1MidP/(0.25*2500)
gen riskCRLottery2=0.15*riskAversionCRLottery2MidP/(0.2*4000)

gen newCommonRatio1=riskCRSure1-riskCRLottery1
gen newCommonRatio2=riskCRSure2-riskCRLottery2

foreach i in riskGain riskLoss riskMixGainLoss riskCRSure riskCRLottery newCommonRatio { 
	gen FA`i'1 = `i'1
	gen FA`i'2 = `i'2
}

// Stack variables then estimate and store the intra-variable correlations. Use for analysis versions to account for non-linearity in discount rate. Note keep main variables (riskAversionGain etc) here as we will use them for the directional variables
#delimit;
foreach i in reciprocityHigh reciprocityLow advanInequalityAversion disadInequalityAversion 
	riskGain riskLoss riskMixGainLoss 
	WTA WTP endowmentEffect 
	riskCRSure riskCRLottery newCommonRatio 
	riskAversionCRLottery riskAversionCRSure commonRatio
	riskAversionGain riskAversionLoss riskAversionMixGainLoss 
	ambiguityAversion compoundAversion overestimationIQ
	overPlacementFacts qualOverPrecise discountRate {;
			gen N`i'=`i'1 if stack==1;
			replace N`i'=`i'2 if stack==2;
			qui corr FA`i'1 FA`i'2 if stack==1 [aweight=weight];
			gen corr`i'=`r(rho)';			
};

#delimit cr

// These variables only have one measure, so include only for stack 1
foreach var of varlist punishReceiver punishSender trust altruism {
	gen N`var'=`var' if stack==1
}

//Percentage of observations with the expected direction (e.g., risk averse), with absence of that phenomena (e.g., risk neutral), and with the opposite direction (e.g., risk seeking).	
// Generate indicator variables for whether variable is a) in "normal" direction b) zero c) "reverse" direction
foreach var in NriskAversionGain NriskAversionCRLottery NriskAversionCRSure  ///
	NadvanInequalityAversion NdisadInequalityAversion 						///
	NambiguityAversion NcompoundAversion NendowmentEffect NcommonRatio NoverestimationIQ NoverPlacementFacts	{
		gen normGE_`var'=(`var'>=0)
		gen rev_`var'=(`var'<0)	
}

// Risk over losses is "normal" if risk loving
gen normGE_NriskAversionLoss =(NriskAversionLoss<=0)
gen rev_NriskAversionLoss =(NriskAversionLoss>0)	

// Now copy the values for the new variable definitions
foreach i in Gain Loss CRSure CRLottery {
	gen normGE_Nrisk`i'=normGE_NriskAversion`i'
	gen rev_Nrisk`i'=rev_NriskAversion`i'
}
gen normGE_NnewCommonRatio=normGE_NcommonRatio
gen rev_NnewCommonRatio=rev_NcommonRatio

// Gen alternate definitions of "standard" for riskAversionGain
// Summarize to identify value of first risk loving category
summ NriskAversionGain if NriskAversionGain<0 &stack==1
gen AltNorm_NriskAversionGain=(NriskAversionGain>=`r(max)'-0.0001) if stack==1
summ NriskAversionGain if NriskAversionGain<0 &stack==2
replace AltNorm_NriskAversionGain=(NriskAversionGain>=`r(max)'-0.0001) if stack==2

// Standardize the overprecison variable
qui summ NqualOverPrecise [aweight=weight]
replace NqualOverPrecise=NqualOverPrecise/r(sd)

// gen copy of variables to be used for medians and SDs in collapse below
foreach var of varlist N* {
	gen SD`var'=`var'
}

//the "1-" makes these (EV-CE)/EV
foreach i in riskGain riskLoss riskMixGainLoss riskCRSure riskCRLottery {
	replace N`i' = 1 - N`i'

}

// Now collapse the dataset and then reshape so that variable names are the observations
collapse (mean) N* normGE_N* rev_N* AltNorm_N* corr* (sd) SDN* [aweight=weight]

// For the measures of "normal behaviour" take the average of the percentages of the two measures 
// First those with only greater/equal or less than

gen id=1 /** Needed for reshape **/
reshape long N SDN normGE_N rev_N AltNorm_N corr,i(id) j(variable) string
rename N mean
rename SDN stdDev
rename normGE_N percentNormalGreaterEqual
rename rev_N percentReverse
rename AltNorm_N alternativePctNormalGreaterEqual
rename corr correlation
drop id

replace percentNormalGreaterEqual=percentNormalGreaterEqual*100
order percentNormalGreaterEqual mean stdDev correlation

// Place the variables in a better order
// Rearrange the loop to change the order	
gen order=.
local counter=0
#delimit;
foreach i in reciprocityLow reciprocityHigh trust altruism punishSender punishReceiver 
	advanInequalityAversion disadInequalityAversion riskGain riskLoss riskMixGainLoss 
	WTA WTP endowmentEffect riskCRSure riskCRLottery newCommonRatio ambiguityAversion
	compoundAversion overestimationIQ overPlacementFacts qualOverPrecise discountRate  {;
		local counter=`counter'+1;
		replace order=`counter' if variable=="`i'";
};		
 #delimit cr
sort order
drop if order==.
drop order


// Create a matrix that we will output to latex
mkmat mean stdDev percentNormalGreaterEqual correlation, matrix(descstats)

// Matrix to append percentages
matrix pct=J(rowsof(descstats),colsof(descstats),0) 
local rows=rowsof(pct)
forvalues y=1/`rows' {
	if descstats[`y',3]<.  {
		matrix pct[`y',3]=1 
	}	
}

#delimit; 
frmttable using "${outputpath}econographicsDescStatistics.tex",
		tex replace fragment statmat(descstats) sdec(2 ,2, 0, 2)  
		annotate(pct) asymbol(\%)  
		plain nocenter 
		ctitles("Variable & Description / Unit", "Mean" , "Standard Deviation" , "\% Standard" , "Correlation")
		rtitles("Reciprocity: Low & \% of Possible Points Returned" \ 
		"Reciprocity: High & \% of Possible Points Returned" \ 
		"Trust & \% of Possible Points Sent" \ 
		"Altruism (dictator) & \% of Possible Points Sent" \ 
		"Anti-social Punishment & \% of Possible Points Used" \ 
		"Pro-social Punishment & \% of Possible Points Used" \ 
		"Dislike Having More &  \% of Income Forgone for Equal Split" \ 
		"Dislike Having Less &  \% of Income Forgone for Equal Split" \ 
		"Risk Aversion: Gains & (EV $-$ CE)/EV" \ 
		"Risk Aversion: Losses & (EV $-$ CE)/EV" \ 
		"Risk Aversion: Gain / Loss & (EV $-$ CE)/EV"  \ 
		"WTA & \% of Expected Value" \ 
		"WTP & \% of Expected Value" \ 
		"Endowment Effect & \% of Expected Value" \ 
		"Risk Aversion: CR Certain & 1 $-$ EV of LE as \% of Sure Amount" \ 
		"Risk Aversion: CR Lottery & 1 $-$ EV of LE as \% of EV of Lottery" \ 
		"Common Ratio  & (EV $-$ CE)/EV" \ 
		"Ambiguity Aversion  & Ambiguous CE $-$ Risky CE" \ 
		"Compound Aversion  & Compound CE $-$ Risky CE" \ 
		"Overestimation  & Perceived $-$ Real \# Correct (of 3)" \ 
		"Overplacement & Perceived $-$ Real percentile" \ 
		"Overprecision & Standardized subjective precision" \ 
		"Patience & Monthly discount rate");
#delimit cr		
